nsample = 100
bonobo_results <- read.delim(paste("/home/ubuntu/bonobo_simulation_results_sample", nsample, "_gene1000.txt", sep = ""))
# mix_prop = 0.5
# bonobo_results <- read.delim(paste("/home/ubuntu/bonobo_simulation_results_mixture_prop", mix_prop, ".txt", sep = ""))
# bonobo_results <- read.delim(paste("/home/ubuntu/bonobo_simulation_results_gene0_prop", mix_prop, ".txt", sep = ""))
# nblock = 2
# bonobo_results <- read.delim(paste("/home/ubuntu/bonobo_simulation_results_block", str(nblock), "_gene100.txt", sep = ""))

bonobo_mse = bonobo_results$mse_bonobo
bonobo_mse_sparse = bonobo_results$mse_bonobo_sparse
lioness_mse = bonobo_results$mse_lioness
spcc_mse = bonobo_results$mse_spcc

# Mean MSE
mean(bonobo_mse)
mean(bonobo_mse_sparse)
mean(lioness_mse)
mean(spcc_mse)

sd(bonobo_mse)
sd(bonobo_mse_sparse)
sd(lioness_mse)
sd(spcc_mse)

# Boxplot of mse

library(ggplot2)
fulldata = data.frame(c(bonobo_mse, lioness_mse, spcc_mse))
colnames(fulldata) = "MSE"
fulldata$Method = c(rep("BONOBO", length(bonobo_mse)), 
                    rep("LIONESS", length(lioness_mse)),
                    rep("SPCC", length(spcc_mse)))

ggplot(fulldata, aes(x=Method, y= MSE, color = Method)) + geom_boxplot() + ggtitle("Simulation 1: sample = 100, genes = 1000")


# Change in MSE with sample size
samples = c(10, 20, 30, 50, 75, 100, 150, 200)
MSE_bonobo = c(0.0987,0.0478,0.0351,0.0234,0.0176,0.0152,0.0127,0.0112)
MSE_bonobo_sparse = c(0.0592,0.0474,0.0451,0.0448,0.0447,0.0449,0.0446,0.0448)
MSE_lioness = c(0.1660,0.1608,0.1582,0.1570,0.1558,0.1559,0.1550,0.1551)
MSE_spcc = c(0.1732,0.1735,0.1732,0.1733,0.1732,0.1739,0.1734,0.1731)
MSE_sweet = c(0.0923,0.0489,0.0378,0.0272,0.0217,0.0195,0.0173,0.0160)

fulldata = data.frame(c(MSE_bonobo, MSE_lioness, MSE_spcc, MSE_sweet))
colnames(fulldata) = "MSE"
fulldata$sample_size = c(samples, samples, samples, samples)
fulldata$Method = c(rep("BONOBO", length(MSE_bonobo)), rep("LIONESS", length(MSE_lioness)), rep("SPCC", length(MSE_spcc)), rep("SWEET", length(MSE_sweet)))

ggplot(fulldata, aes(x=sample_size, y=MSE, group=Method, color=Method)) +
  geom_line(size=1) + ggtitle("Change in MSE with sample size") + xlab("sample size") +
  theme(legend.position = c(0.8,0.6), axis.title = element_text(size = 15), title = element_text(size = 15))

# + theme(legend.position = c(0.8, 0.4))

###################################
cov100 <- read.csv("/home/esaha/BONOBO/data/simulation_simple/simulation_covariance100.csv", sep="")
cov500 <- read.csv("/home/esaha/BONOBO/data/simulation_simple/simulation_covariance500.csv", sep="")
cov1000 <- read.csv("/home/esaha/BONOBO/data/simulation_simple/simulation_covariance1000.csv", sep="")

# Plot covariance distribution for ngenes = 100, 500, 1000
v100 = cov2cor(as.matrix(cov100))
v500 = cov2cor(as.matrix(cov500))
v1000 = cov2cor(as.matrix(cov1000))

diag(v100) = NA
diag(v500) = NA
diag(v1000) = NA

v100 = na.omit(as.numeric(v100))
v500 = na.omit(as.numeric(v500))
v1000 = na.omit(as.numeric(v1000))

correlations = c(v100, v500, v1000)

fulldata = data.frame(correlations)
fulldata$ngenes = c(rep("ngene = 100", length(v100)), rep("ngene = 500", length(v500)), rep("ngene = 1000", length(v1000)))
fulldata$ngenes = factor(fulldata$ngenes, levels = c("ngene = 100", "ngene = 500", "ngene = 1000"))

library(viridis)
library(forcats)

ggplot(fulldata, aes(x=correlations, color=ngenes, fill=ngenes)) +
  geom_histogram(aes(y=after_stat(count)/sum(after_stat(count)))) +
  scale_fill_viridis(discrete=TRUE) +
  scale_color_viridis(discrete=TRUE) +
  theme(
    legend.position="none",
    panel.spacing = unit(0.1, "lines"),
    strip.text.x = element_text(size = 8)
  ) +
  xlab("Correlation values") +
  ylab("Probability (%)") +
  ggtitle("Distribution of Correlation Values by Number of Genes") +
  facet_wrap(~ngenes)

#######################################
# Change in MSE with Mixture Proportion
mix_prop = c(0, 10, 20, 30, 40, 50)
MSE_bonobo = c(0.0152,0.0592,0.0339,0.0392,0.0423,0.0547)
MSE_bonobo_sparse = c(0.0449,0.0477,0.0490,0.0509,0.0492,0.0489)
MSE_lioness = c(0.1559,0.1384,0.1495,0.1551,0.1608,0.1498)
MSE_spcc = c(0.1739,0.1748,0.1737,0.1728,0.1757,0.1693)
MSE_sweet = c(0.0195,0.0607,0.0370,0.0427,0.0464,0.0581)

fulldata = data.frame(c(MSE_bonobo, MSE_lioness, MSE_spcc, MSE_sweet))
colnames(fulldata) = "MSE"
fulldata$mix_prop = c(mix_prop, mix_prop, mix_prop, mix_prop)
fulldata$Method = c(rep("BONOBO", length(MSE_bonobo)), rep("LIONESS", length(MSE_lioness)), rep("SPCC", length(MSE_spcc)), rep("SWEET", length(MSE_sweet)))

library(ggplot2)
ggplot(fulldata, aes(x=mix_prop, y=MSE, group=Method, color=Method)) +
  geom_line(size=1) + ggtitle("Change in MSE with Mixture Proportion") + xlab("% of samples in smaller group") +
  theme(legend.position = c(0.8,0.6), axis.title = element_text(size = 15), title = element_text(size = 15))

########################################

#######################################
# Change in MSE with Proportion of altered samples
mix_prop = c(5, 10, 20, 30, 40, 50, 75, 90, 95)
MSE_bonobo = c(0.0332,0.0276,0.0253,0.0273,0.0239,0.0249,0.0138,0.0132,0.0098)
MSE_bonobo_sparse = c(0.0038,0.0007,0,0,0,0,0,0,0)
MSE_lioness = c(0.1784,0.1690,0.1530,0.1074,0.0695,0.0473,0.0159,0.0058,0.0028)
MSE_spcc = c(0.0561,0.1079,0.1228,0.0806,0.0509,0.0343,0.0115,0.0039,0.0018)
MSE_sweet = c(0,0,0,0,0,0,0,0,0)

fulldata = data.frame(c(MSE_bonobo, MSE_bonobo_sparse, MSE_lioness, MSE_spcc, MSE_sweet))
colnames(fulldata) = "MSE"
fulldata$mix_prop = c(mix_prop, mix_prop, mix_prop, mix_prop, mix_prop)
fulldata$Method = c(rep("BONOBO", length(MSE_bonobo)), rep("sparse BONOBO", length(MSE_bonobo_sparse)), rep("LIONESS", length(MSE_lioness)), rep("SPCC", length(MSE_spcc)), rep("sparse SWEET", length(MSE_sweet)))
fulldata$Method = factor(fulldata$Method, levels = c("BONOBO", "sparse BONOBO", "LIONESS", "SPCC", "sparse SWEET"))

library(ggplot2)
ggplot(fulldata, aes(x=mix_prop, y=MSE, group=Method, color=Method)) +
  geom_line(size = 1) + ggtitle("Change in MSE with Proportion of Altered Samples") + xlab("% of samples altered") +
  scale_color_manual(values=c("red", "blue", "green3", "cyan3", "violet")) +
  theme(legend.position = c(0.8,0.6), axis.title = element_text(size = 15), title = element_text(size = 15))


#######################################
# Change in MSE with delta: simple simulation
delta = c(0.1, 1, 5, 10, 25, 50, 75, 90)/100
MSE_bonobo = c(0.0148,0.0156,0.0160,0.0196,0.0398,0.1022, 0.2188,0.3713)
MSE_bonobo_sparse = c(0.0442,0.0447,0.2021,0.2020,0.2021,0.2019,0.2029,0.2021)
delta_tuned = 0.0152
delta_tuned_bonobo_sparse = 0.0449
lioness = 0.1559
spcc = 0.1739
sweet = 0.0195

fulldata = data.frame(MSE_bonobo)
colnames(fulldata) = "MSE"
fulldata$delta = delta

library(ggplot2)
ggplot(fulldata, aes(x=delta, y=MSE)) + geom_line() + ggtitle("Change in MSE with delta") + xlab("delta") +
  geom_hline(yintercept = delta_tuned, linetype="dashed", color = "red") +
  geom_text(aes(0.4, delta_tuned, label = "BONOBO (delta tuned)", vjust = -1), size = 3, color = "red") +
  geom_hline(yintercept = lioness, linetype="dashed", color = "green3") +
  geom_text(aes(0.4, lioness, label = "LIONESS", vjust = -1), size = 3, color = "green3") +
  geom_hline(yintercept = spcc, linetype="dashed", color = "cyan3") +
  geom_text(aes(0.4, spcc, label = "SPCC", vjust = -1), size = 3, color = "cyan3") +
  geom_hline(yintercept = sweet, linetype="dashed", color = "purple") +
  geom_text(aes(0.8, sweet, label = "SWEET", vjust = -1), size = 3, color = "purple")

fulldata = data.frame(c(MSE_bonobo, MSE_bonobo_sparse))
colnames(fulldata) = "MSE"
fulldata$delta = c(delta, delta)
fulldata$Method = c(rep("BONOBO", length(MSE_bonobo)), rep("sparse BONOBO", length(MSE_bonobo_sparse)))
library(ggplot2)
ggplot(fulldata, aes(x=delta, y=MSE, group=Method, color=Method)) +
  geom_line(linetype = "dashed") + ggtitle("Change in MSE with delta") + xlab("% of samples altered") +
  scale_color_manual(values=c("red", "blue")) + 
  geom_hline(yintercept = delta_tuned, color = "red") +
  geom_text(aes(0.4, delta_tuned, label = "BONOBO (delta tuned)", vjust = -1), size = 3, color = "red") +
  geom_hline(yintercept = delta_tuned_bonobo_sparse, color = "blue") +
  geom_text(aes(0.4, delta_tuned_bonobo_sparse, label = "BONOBO sparse (delta tuned)", vjust = -1), size = 3, color = "blue")
  



#######################################
# Change in MSE with delta: mixture
delta = c(0.1, 0.5, 1, 5, 10, 25, 50, 75, 90)/100
MSE_bonobo = c(0.1023,0.1013,0.1016,0.1027,0.1048,0.1236,0.1821,0.3012,0.4544)
MSE_bonobo_sparse = c(0.0954,0.0596,0.0445,0.1063,0.1089,0.1087,0.1088,0.1088,0.1090)
delta_tuned = 0.0991
delta_tuned_bonobo_sparse = 0.0333
lioness = 0.1075
spcc = 0.1060
sweet = 0.1040

fulldata = data.frame(MSE_bonobo)
colnames(fulldata) = "MSE"
fulldata$delta = delta

library(ggplot2)
ggplot(fulldata, aes(x=delta, y=MSE)) + geom_line() + ggtitle("Change in MSE with delta") + xlab("delta") +
  geom_hline(yintercept = delta_tuned, linetype="dashed", color = "red") +
  geom_text(aes(0.2, delta_tuned, label = "BONOBO (delta tuned)", vjust = -1), size = 3, color = "red") +
  geom_hline(yintercept = lioness, linetype="dashed", color = "green3") +
  geom_text(aes(0.4, lioness, label = "LIONESS", vjust = -1), size = 3, color = "green3") +
  geom_hline(yintercept = spcc, linetype="dashed", color = "cyan3") +
  geom_text(aes(0.6, spcc, label = "SPCC", vjust = -1), size = 3, color = "cyan3") +
  geom_hline(yintercept = sweet, linetype="dashed", color = "purple") +
  geom_text(aes(0.8, sweet, label = "SWEET", vjust = -1), size = 3, color = "purple")

fulldata = data.frame(c(MSE_bonobo, MSE_bonobo_sparse))
colnames(fulldata) = "MSE"
fulldata$delta = c(delta, delta)
fulldata$Method = c(rep("BONOBO", length(MSE_bonobo)), rep("sparse BONOBO", length(MSE_bonobo_sparse)))
library(ggplot2)
ggplot(fulldata, aes(x=delta, y=MSE, group=Method, color=Method)) +
  geom_line(linetype = "dashed") + ggtitle("Change in MSE with delta") + xlab("% of samples altered") +
  scale_color_manual(values=c("red", "blue")) + 
  geom_hline(yintercept = delta_tuned, color = "red") +
  geom_text(aes(0.4, delta_tuned, label = "BONOBO (delta tuned)", vjust = -1), size = 3, color = "red") +
  geom_hline(yintercept = delta_tuned_bonobo_sparse, color = "blue") +
  geom_text(aes(0.4, delta_tuned_bonobo_sparse, label = "BONOBO sparse (delta tuned)", vjust = -1), size = 3, color = "blue")

